import numpy as np
import collections
from scipy.stats import entropy

class Tweet_Word_List(object):
    def __init__(self, article):
        self.article = article

    def set_word_list(self, total_word_list):
        article_num_dict = collections.defaultdict(int)
        for eachword in self.article.split():
            article_num_dict[eachword] += 1

        self.article_words_list = []
        self.article_words_weight = []
        for word, num in article_num_dict.items():
            self.article_words_list.append(total_word_list.index(word))
            self.article_words_weight.append(num)
        self.article_words_weight_normalized = np.array(self.article_words_weight, dtype=np.float32) / sum(self.article_words_weight)


def _KL_div(a, b):
    return entropy(pk=a, qk=b)

def JS_div(a, b):
    mid = (a + b) / 2
    return 0.5 * (_KL_div(a, mid) + _KL_div(b, mid))

def P_star_Symbol(pvalue):
    if pvalue > 0.05:
        return 'ns'
    elif pvalue <= 0.05 and pvalue > 0.01:
        return '*'
    elif pvalue <= 0.01 and pvalue > 0.001:
        return '**'
    elif pvalue <= 0.001 and pvalue > 0.0001:
        return '***'
    else:
        return '****'